From c17b1a485fa7ed44d5f6e519d567f52ae7fc5200 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Thu, 19 May 2005 12:36:18 +0000 Subject: [PATCH] bitkeeper revision 1.1462 (428c884242SexXIVbHWO-OkTk2Q95w) Clean up, fix, and rationalise RAM mapping in Xen. First, x86/64 must take care to map only registered RAM areas and not adjacent I/O holes -- otherwise a cpu may cache I/O space and cause coherency conflicts on the memory bus. Second, map_pages() and the memguard mechanisms are no longer sub-arch specific (moved to arch/x86/mm.c:map_pages_to_xen()). Signed-off-by: Keir Fraser --- xen/arch/x86/acpi/boot.c | 11 +- xen/arch/x86/boot/x86_64.S | 8 +- xen/arch/x86/domain.c | 4 +- xen/arch/x86/domain_build.c | 4 +- xen/arch/x86/mm.c | 178 ++++++++++++++++--------------- xen/arch/x86/setup.c | 55 +++++----- xen/arch/x86/x86_32/mm.c | 142 +++++-------------------- xen/arch/x86/x86_64/mm.c | 203 ++++++------------------------------ xen/include/asm-x86/mm.h | 8 +- xen/include/asm-x86/page.h | 18 +++- 10 files changed, 218 insertions(+), 413 deletions(-) diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c index 553a6631c9..79c35b8719 100644 --- a/xen/arch/x86/acpi/boot.c +++ b/xen/arch/x86/acpi/boot.c @@ -89,15 +89,18 @@ EXPORT_SYMBOL(x86_acpiid_to_apicid); */ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; -#ifdef CONFIG_X86_64 +#if 0/*def CONFIG_X86_64*/ /* rely on all ACPI tables being in the direct mapping */ char *__acpi_map_table(unsigned long phys_addr, unsigned long size) { if (!phys_addr || !size) - return NULL; - /* XEN: We map all e820 areas which should include every ACPI table. */ - return __va(phys_addr); + return NULL; + + if (phys_addr < (end_pfn_map << PAGE_SHIFT)) + return __va(phys_addr); + + return NULL; } #else diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index 5ebb24d647..4be1c0684e 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -230,7 +230,7 @@ ENTRY(gdt_table) .quad 0x0000000000000000 /* unused */ .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */ -/* Initial PML4 -- level-4 page table */ +/* Initial PML4 -- level-4 page table. */ .org 0x2000 ENTRY(idle_pg_table) ENTRY(idle_pg_table_4) @@ -238,15 +238,15 @@ ENTRY(idle_pg_table_4) .fill 261,8,0 .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262] -/* Initial PDP -- level-3 page table */ +/* Initial PDP -- level-3 page table. */ .org 0x3000 ENTRY(idle_pg_table_l3) .quad idle_pg_table_l2 - __PAGE_OFFSET + 7 -/* Initial PDE -- level-2 page table. */ +/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */ .org 0x4000 ENTRY(idle_pg_table_l2) - .macro identmap from=0, count=512 + .macro identmap from=0, count=32 .if \count-1 identmap "(\from+0)","(\count/2)" identmap "(\from+(0x200000*(\count/2)))","(\count/2)" diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 458257a492..fa5343e3d2 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -264,7 +264,7 @@ void arch_do_createdomain(struct exec_domain *ed) ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt; ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] = l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)), - __PAGE_HYPERVISOR); + PAGE_HYPERVISOR); ed->arch.guest_vtable = __linear_l2_table; ed->arch.shadow_vtable = __shadow_linear_l2_table; @@ -303,7 +303,7 @@ void arch_do_boot_vcpu(struct exec_domain *ed) d->arch.mm_perdomain_pt + (ed->vcpu_id << PDPT_VCPU_SHIFT); ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] = l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)), - __PAGE_HYPERVISOR); + PAGE_HYPERVISOR); } #ifdef CONFIG_VMX diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 72ded37bdc..0bda826f7a 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -574,8 +574,8 @@ int construct_dom0(struct domain *d, // ASSERT( root_get_value(idle_pg_table[1]) == 0 ); ASSERT( pagetable_val(d->arch.phys_table) ); - idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table), - __PAGE_HYPERVISOR); + idle_pg_table[1] = root_create_phys( + pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR); translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT), pagetable_get_pfn(ed->arch.guest_table)); idle_pg_table[1] = root_empty(); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index ccf8e3725a..720f73ec04 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -160,8 +160,8 @@ void __init init_frametable(void) p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20); if ( p == 0 ) panic("Not enough memory for frame table\n"); - map_pages(idle_pg_table, FRAMETABLE_VIRT_START + i, p, - 4UL << 20, PAGE_HYPERVISOR); + map_pages_to_xen( + FRAMETABLE_VIRT_START + i, p, 4UL << 20, PAGE_HYPERVISOR); } memset(frame_table, 0, frame_table_size); @@ -2833,101 +2833,113 @@ void ptwr_destroy(struct domain *d) free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_INACTIVE].page); } +/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */ +int map_pages_to_xen( + unsigned long v, + unsigned long p, + unsigned long s, + unsigned long flags) +{ + l2_pgentry_t *pl2e, ol2e; + l1_pgentry_t *pl1e; + unsigned int i; + unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES); + flags &= ~MAP_SMALL_PAGES; -/************************************************************************/ -/************************************************************************/ -/************************************************************************/ + while ( s != 0 ) + { + pl2e = virt_to_xen_l2e(v); -/* Graveyard: stuff below may be useful in future. */ -#if 0 - case MMUEXT_TRANSFER_PAGE: - domid = (domid_t)(val >> 16); - gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF)); - - if ( unlikely(IS_XEN_HEAP_FRAME(page)) || - unlikely(!pfn_valid(pfn)) || - unlikely((e = find_domain_by_id(domid)) == NULL) ) + if ( (((v|p) & ((1 << L2_PAGETABLE_SHIFT) - 1)) == 0) && + (s >= (1 << L2_PAGETABLE_SHIFT)) && + !map_small_pages ) { - MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid); - okay = 0; - break; - } - - spin_lock(&d->page_alloc_lock); + /* Super-page mapping. */ + ol2e = *pl2e; + *pl2e = l2e_create_phys(p, flags|_PAGE_PSE); - /* - * The tricky bit: atomically release ownership while there is just one - * benign reference to the page (PGC_allocated). If that reference - * disappears then the deallocation routine will safely spin. - */ - _d = pickle_domptr(d); - _nd = page->u.inuse._domain; - y = page->count_info; - do { - x = y; - if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != - (1|PGC_allocated)) || - unlikely(_nd != _d) ) + if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) { - MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p," - " caf=%08x, taf=%08x\n", page_to_pfn(page), - d, d->domain_id, unpickle_domptr(_nd), x, - page->u.inuse.type_info); - spin_unlock(&d->page_alloc_lock); - put_domain(e); - return 0; + local_flush_tlb_pge(); + if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) ) + free_xen_pagetable(l2e_get_page(*pl2e)); } - __asm__ __volatile__( - LOCK_PREFIX "cmpxchg8b %2" - : "=d" (_nd), "=a" (y), - "=m" (*(volatile u64 *)(&page->count_info)) - : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); - } - while ( unlikely(_nd != _d) || unlikely(y != x) ); - /* - * Unlink from 'd'. At least one reference remains (now anonymous), so - * noone else is spinning to try to delete this page from 'd'. - */ - d->tot_pages--; - list_del(&page->list); - - spin_unlock(&d->page_alloc_lock); + v += 1 << L2_PAGETABLE_SHIFT; + p += 1 << L2_PAGETABLE_SHIFT; + s -= 1 << L2_PAGETABLE_SHIFT; + } + else + { + /* Normal page mapping. */ + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) + { + pl1e = page_to_virt(alloc_xen_pagetable()); + clear_page(pl1e); + *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR); + } + else if ( l2e_get_flags(*pl2e) & _PAGE_PSE ) + { + pl1e = page_to_virt(alloc_xen_pagetable()); + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + pl1e[i] = l1e_create_pfn( + l2e_get_pfn(*pl2e) + i, + l2e_get_flags(*pl2e) & ~_PAGE_PSE); + *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR); + local_flush_tlb_pge(); + } - spin_lock(&e->page_alloc_lock); + pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v); + if ( (l1e_get_flags(*pl1e) & _PAGE_PRESENT) ) + local_flush_tlb_one(v); + *pl1e = l1e_create_phys(p, flags); - /* - * Check that 'e' will accept the page and has reservation headroom. - * Also, a domain mustn't have PGC_allocated pages when it is dying. - */ - ASSERT(e->tot_pages <= e->max_pages); - if ( unlikely(test_bit(_DOMF_dying, &e->domain_flags)) || - unlikely(e->tot_pages == e->max_pages) || - unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) - { - MEM_LOG("Transferee has no reservation headroom (%d,%d), or " - "provided a bad grant ref, or is dying (%p).\n", - e->tot_pages, e->max_pages, e->flags); - spin_unlock(&e->page_alloc_lock); - put_domain(e); - okay = 0; - break; + v += 1 << L1_PAGETABLE_SHIFT; + p += 1 << L1_PAGETABLE_SHIFT; + s -= 1 << L1_PAGETABLE_SHIFT; } + } - /* Okay, add the page to 'e'. */ - if ( unlikely(e->tot_pages++ == 0) ) - get_knownalive_domain(e); - list_add_tail(&page->list, &e->page_list); - page_set_owner(page, e); + return 0; +} - spin_unlock(&e->page_alloc_lock); +#ifdef MEMORY_GUARD + +void memguard_init(void) +{ + map_pages_to_xen( + PAGE_OFFSET, 0, xenheap_phys_end, __PAGE_HYPERVISOR|MAP_SMALL_PAGES); +} + +static void __memguard_change_range(void *p, unsigned long l, int guard) +{ + unsigned long _p = (unsigned long)p; + unsigned long _l = (unsigned long)l; + unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES; + + /* Ensure we are dealing with a page-aligned whole number of pages. */ + ASSERT((_p&PAGE_MASK) != 0); + ASSERT((_l&PAGE_MASK) != 0); + ASSERT((_p&~PAGE_MASK) == 0); + ASSERT((_l&~PAGE_MASK) == 0); + + if ( guard ) + flags &= ~_PAGE_PRESENT; + + map_pages_to_xen((unsigned long)(_p), __pa(_p), _l, flags); +} + +void memguard_guard_range(void *p, unsigned long l) +{ + __memguard_change_range(p, l, 1); +} + +void memguard_unguard_range(void *p, unsigned long l) +{ + __memguard_change_range(p, l, 0); +} - /* Transfer is all done: tell the guest about its new page frame. */ - gnttab_notify_transfer(e, d, gntref, pfn); - - put_domain(e); - break; #endif /* diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 54a5670214..0998b207f5 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -58,7 +58,8 @@ boolean_param("noapic", skip_ioapic_setup); int early_boot = 1; -unsigned long xenheap_phys_end; +/* Limits of Xen heap, used to initialise the allocator. */ +unsigned long xenheap_phys_start, xenheap_phys_end; extern void arch_init_memory(void); extern void init_IRQ(void); @@ -396,10 +397,9 @@ static void __init start_of_day(void) arch_do_createdomain(current); /* Map default GDT into their final position in the idle page table. */ - map_pages( - idle_pg_table, + map_pages_to_xen( GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE, - virt_to_phys(gdt_table), PAGE_SIZE, __PAGE_HYPERVISOR); + virt_to_phys(gdt_table), PAGE_SIZE, PAGE_HYPERVISOR); /* Process CPU type information. */ identify_cpu(&boot_cpu_data); @@ -473,7 +473,6 @@ void __init __start_xen(multiboot_info_t *mbi) { char *cmdline; module_t *mod = (module_t *)__va(mbi->mods_addr); - void *heap_start; unsigned long firsthole_start, nr_pages; unsigned long initial_images_start, initial_images_end; struct e820entry e820_raw[E820MAX]; @@ -561,8 +560,7 @@ void __init __start_xen(multiboot_info_t *mbi) #endif /* Initialise boot-time allocator with all RAM situated after modules. */ - heap_start = memguard_init(&_end); - heap_start = __va(init_boot_allocator(__pa(heap_start))); + xenheap_phys_start = init_boot_allocator(__pa(&_end)); nr_pages = 0; for ( i = 0; i < e820.nr_map; i++ ) { @@ -573,22 +571,31 @@ void __init __start_xen(multiboot_info_t *mbi) init_boot_pages((e820.map[i].addr < initial_images_end) ? initial_images_end : e820.map[i].addr, e820.map[i].addr + e820.map[i].size); - } - #if defined (CONFIG_X86_64) - /* On x86/64 we can 1:1 map every registered memory area. */ - /* We use the raw_e820 map because we sometimes truncate the cooked map. */ - for ( i = 0; i < e820_raw_nr; i++ ) - { - unsigned long min, sz; - min = (unsigned long)e820_raw[i].addr & - ~(((unsigned long)L1_PAGETABLE_ENTRIES << PAGE_SHIFT) - 1); - sz = ((unsigned long)e820_raw[i].size + - ((unsigned long)L1_PAGETABLE_ENTRIES << PAGE_SHIFT) - 1) & - ~(((unsigned long)L1_PAGETABLE_ENTRIES << PAGE_SHIFT) - 1); - map_pages(idle_pg_table, PAGE_OFFSET + min, min, sz, PAGE_HYPERVISOR); - } + /* + * x86/64 maps all registered RAM. Points to note: + * 1. The initial pagetable already maps low 64MB, so skip that. + * 2. We must map *only* RAM areas, taking care to avoid I/O holes. + * Failure to do this can cause coherency problems and deadlocks + * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug). + */ + { + unsigned long start = (unsigned long)e820.map[i].addr; + unsigned long size = (unsigned long)e820.map[i].size; + size = (size + (start & ~PAGE_MASK) + PAGE_SIZE - 1) & PAGE_MASK; + if ( (start &= PAGE_MASK) < (64UL << 20) ) + { + if ( (signed long)(size -= (64UL << 20) - start) <= 0 ) + continue; + start = 64UL << 20; + } + map_pages_to_xen( + PAGE_OFFSET + start, start, size, PAGE_HYPERVISOR); + } #endif + } + + memguard_init(); printk("System RAM: %luMB (%lukB)\n", nr_pages >> (20 - PAGE_SHIFT), @@ -598,10 +605,10 @@ void __init __start_xen(multiboot_info_t *mbi) end_boot_allocator(); - init_xenheap_pages(__pa(heap_start), xenheap_phys_end); + init_xenheap_pages(xenheap_phys_start, xenheap_phys_end); printk("Xen heap: %luMB (%lukB)\n", - (xenheap_phys_end-__pa(heap_start)) >> 20, - (xenheap_phys_end-__pa(heap_start)) >> 10); + (xenheap_phys_end-xenheap_phys_start) >> 20, + (xenheap_phys_end-xenheap_phys_start) >> 10); early_boot = 0; diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 045d904c21..744ba23193 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -27,55 +27,31 @@ #include #include -/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */ -#define __PTE_MASK (~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_PCD|_PAGE_PWT)) -int map_pages( - root_pgentry_t *pt, - unsigned long v, - unsigned long p, - unsigned long s, - unsigned long flags) +struct pfn_info *alloc_xen_pagetable(void) { - l2_pgentry_t *pl2e; - l1_pgentry_t *pl1e; - void *newpg; + extern int early_boot; + extern unsigned long xenheap_phys_start; + struct pfn_info *pg; - while ( s != 0 ) + if ( !early_boot ) { - pl2e = &pt[l2_table_offset(v)]; - - if ( ((s|v|p) & ((1<= __end_of_fixed_addresses) ) BUG(); - map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags); + map_pages_to_xen(fix_to_virt(idx), p, PAGE_SIZE, flags); } - void __init paging_init(void) { void *ioremap_pt; @@ -122,9 +97,10 @@ void __init paging_init(void) * can reused this address space for their phys-to-machine mapping. */ idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] = - l2e_create_pfn(l2e_get_pfn(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]), - l2e_get_flags(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) - & ~(_PAGE_RW | _PAGE_GLOBAL)); + l2e_create_pfn( + l2e_get_pfn(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]), + l2e_get_flags(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) + & ~(_PAGE_RW | _PAGE_GLOBAL)); /* Set up mapping cache for domain pages. */ mapcache = (l1_pgentry_t *)alloc_xenheap_page(); @@ -196,7 +172,6 @@ long do_stack_switch(unsigned long ss, unsigned long esp) return 0; } - /* Returns TRUE if given descriptor is valid for GDT or LDT. */ int check_descriptor(struct desc_struct *d) { @@ -299,78 +274,11 @@ int check_descriptor(struct desc_struct *d) return 0; } - -#ifdef MEMORY_GUARD - -void *memguard_init(void *heap_start) -{ - l1_pgentry_t *l1; - int i, j; - - /* Round the allocation pointer up to a page boundary. */ - heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & - PAGE_MASK); - - /* Memory guarding is incompatible with super pages. */ - for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ ) - { - l1 = (l1_pgentry_t *)heap_start; - heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); - for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) - l1[j] = l1e_create_phys((i << L2_PAGETABLE_SHIFT) | - (j << L1_PAGETABLE_SHIFT), - __PAGE_HYPERVISOR); - idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] = - l2e_create_phys(virt_to_phys(l1), __PAGE_HYPERVISOR); - } - - return heap_start; -} - -static void __memguard_change_range(void *p, unsigned long l, int guard) -{ - l1_pgentry_t *l1; - l2_pgentry_t *l2; - unsigned long _p = (unsigned long)p; - unsigned long _l = (unsigned long)l; - - /* Ensure we are dealing with a page-aligned whole number of pages. */ - ASSERT((_p&PAGE_MASK) != 0); - ASSERT((_l&PAGE_MASK) != 0); - ASSERT((_p&~PAGE_MASK) == 0); - ASSERT((_l&~PAGE_MASK) == 0); - - while ( _l != 0 ) - { - l2 = &idle_pg_table[l2_table_offset(_p)]; - l1 = l2e_to_l1e(*l2) + l1_table_offset(_p); - if ( guard ) - l1e_remove_flags(l1, _PAGE_PRESENT); - else - l1e_add_flags(l1, _PAGE_PRESENT); - _p += PAGE_SIZE; - _l -= PAGE_SIZE; - } -} - void memguard_guard_stack(void *p) { memguard_guard_range(p, PAGE_SIZE); } -void memguard_guard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 1); - local_flush_tlb(); -} - -void memguard_unguard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 0); -} - -#endif - /* * Local variables: * mode: C diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index f3c686750f..532ad606ce 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -29,95 +29,47 @@ #include #include -static void *safe_page_alloc(void) +struct pfn_info *alloc_xen_pagetable(void) { extern int early_boot; - if ( early_boot ) - { - unsigned long p = alloc_boot_pages(PAGE_SIZE, PAGE_SIZE); - if ( p == 0 ) - goto oom; - return phys_to_virt(p); - } - else - { - struct pfn_info *pg = alloc_domheap_page(NULL); - if ( pg == NULL ) - goto oom; - return page_to_virt(pg); - } - oom: - panic("Out of memory"); - return NULL; + unsigned long p; + + if ( !early_boot ) + return alloc_domheap_page(NULL); + + p = alloc_boot_pages(PAGE_SIZE, PAGE_SIZE); + return ((p == 0) ? NULL : phys_to_page(p)); } -/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */ -#define __PTE_MASK (~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_PCD|_PAGE_PWT)) -int map_pages( - root_pgentry_t *pt, - unsigned long v, - unsigned long p, - unsigned long s, - unsigned long flags) +void free_xen_pagetable(struct pfn_info *pg) +{ + free_domheap_page(pg); +} + +l2_pgentry_t *virt_to_xen_l2e(unsigned long v) { l4_pgentry_t *pl4e; l3_pgentry_t *pl3e; l2_pgentry_t *pl2e; - l1_pgentry_t *pl1e; - void *newpg; - while ( s != 0 ) + pl4e = &idle_pg_table[l4_table_offset(v)]; + if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) { - pl4e = &pt[l4_table_offset(v)]; - if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) - { - newpg = safe_page_alloc(); - clear_page(newpg); - *pl4e = l4e_create_phys(__pa(newpg), flags & __PTE_MASK); - } - - pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v); - if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) - { - newpg = safe_page_alloc(); - clear_page(newpg); - *pl3e = l3e_create_phys(__pa(newpg), flags & __PTE_MASK); - } - - pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v); - - if ( ((s|v|p) & ((1<= __end_of_fixed_addresses) ) BUG(); - map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags); + map_pages_to_xen(fix_to_virt(idx), p, PAGE_SIZE, flags); } void __init paging_init(void) @@ -145,8 +97,9 @@ void __init paging_init(void) if ( pg == NULL ) panic("Not enough memory for m2p table\n"); p = page_to_phys(pg); - map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p, - 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR | _PAGE_USER); + map_pages_to_xen( + RDWR_MPT_VIRT_START + i*8, p, + 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR | _PAGE_USER); memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55, 1UL << L2_PAGETABLE_SHIFT); } @@ -331,100 +284,12 @@ int check_descriptor(struct desc_struct *d) return 0; } - -#ifdef MEMORY_GUARD - -#define ALLOC_PT(_level) \ -do { \ - (_level) = (_level ## _pgentry_t *)heap_start; \ - heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); \ - clear_page(_level); \ -} while ( 0 ) -void *memguard_init(void *heap_start) -{ - l1_pgentry_t *l1 = NULL; - l2_pgentry_t *l2 = NULL; - l3_pgentry_t *l3 = NULL; - l4_pgentry_t *l4 = &idle_pg_table[l4_table_offset(PAGE_OFFSET)]; - unsigned long i, j; - - /* Round the allocation pointer up to a page boundary. */ - heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & - PAGE_MASK); - - /* Memory guarding is incompatible with super pages. */ - for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ ) - { - ALLOC_PT(l1); - for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ ) - l1[j] = l1e_create_phys((i << L2_PAGETABLE_SHIFT) | - (j << L1_PAGETABLE_SHIFT), - __PAGE_HYPERVISOR); - if ( !((unsigned long)l2 & (PAGE_SIZE-1)) ) - { - ALLOC_PT(l2); - if ( !((unsigned long)l3 & (PAGE_SIZE-1)) ) - { - ALLOC_PT(l3); - *l4++ = l4e_create_phys(virt_to_phys(l3), __PAGE_HYPERVISOR); - } - *l3++ = l3e_create_phys(virt_to_phys(l2), __PAGE_HYPERVISOR); - } - *l2++ = l2e_create_phys(virt_to_phys(l1), __PAGE_HYPERVISOR); - } - - return heap_start; -} - -static void __memguard_change_range(void *p, unsigned long l, int guard) -{ - l1_pgentry_t *l1; - l2_pgentry_t *l2; - l3_pgentry_t *l3; - l4_pgentry_t *l4; - unsigned long _p = (unsigned long)p; - unsigned long _l = (unsigned long)l; - - /* Ensure we are dealing with a page-aligned whole number of pages. */ - ASSERT((_p&PAGE_MASK) != 0); - ASSERT((_l&PAGE_MASK) != 0); - ASSERT((_p&~PAGE_MASK) == 0); - ASSERT((_l&~PAGE_MASK) == 0); - - while ( _l != 0 ) - { - l4 = &idle_pg_table[l4_table_offset(_p)]; - l3 = l4e_to_l3e(*l4) + l3_table_offset(_p); - l2 = l3e_to_l2e(*l3) + l2_table_offset(_p); - l1 = l2e_to_l1e(*l2) + l1_table_offset(_p); - if ( guard ) - l1e_remove_flags(l1, _PAGE_PRESENT); - else - l1e_add_flags(l1, _PAGE_PRESENT); - _p += PAGE_SIZE; - _l -= PAGE_SIZE; - } -} - void memguard_guard_stack(void *p) { p = (void *)((unsigned long)p + PAGE_SIZE); memguard_guard_range(p, 2 * PAGE_SIZE); } -void memguard_guard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 1); - local_flush_tlb(); -} - -void memguard_unguard_range(void *p, unsigned long l) -{ - __memguard_change_range(p, l, 0); -} - -#endif - /* * Local variables: * mode: C diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index bbab724e48..e8a9617980 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -275,17 +275,17 @@ static inline unsigned long phys_to_machine_mapping(unsigned long pfn) #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn) #ifdef MEMORY_GUARD -void *memguard_init(void *heap_start); -void memguard_guard_stack(void *p); +void memguard_init(void); void memguard_guard_range(void *p, unsigned long l); void memguard_unguard_range(void *p, unsigned long l); #else -#define memguard_init(_s) (_s) -#define memguard_guard_stack(_p) ((void)0) +#define memguard_init() ((void)0) #define memguard_guard_range(_p,_l) ((void)0) #define memguard_unguard_range(_p,_l) ((void)0) #endif +void memguard_guard_stack(void *p); + /* Writable Pagetables */ struct ptwr_info { /* Linear address where the guest is updating the p.t. page. */ diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index 64b0635ce6..93018113a1 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -38,6 +38,11 @@ typedef struct { unsigned long pt_lo; } pagetable_t; #define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) #define pfn_valid(_pfn) ((_pfn) < max_page) +#define l1e_get_page(_x) (pfn_to_page(l1e_get_pfn(_x))) +#define l2e_get_page(_x) (pfn_to_page(l2e_get_pfn(_x))) +#define l3e_get_page(_x) (pfn_to_page(l3e_get_pfn(_x))) +#define l4e_get_page(_x) (pfn_to_page(l4e_get_pfn(_x))) + /* High table entries are reserved by the hypervisor. */ #define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) @@ -131,10 +136,15 @@ static __inline__ int get_order(unsigned long size) return order; } -/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */ -extern int -map_pages( - root_pgentry_t *pt, +/* Allocator functions for Xen pagetables. */ +struct pfn_info *alloc_xen_pagetable(void); +void free_xen_pagetable(struct pfn_info *pg); +l2_pgentry_t *virt_to_xen_l2e(unsigned long v); + +/* Map physical byte range (@p, @p+@s) at address @v in Xen address space. */ +#define MAP_SMALL_PAGES (1UL<<16) /* don't use superpages for the mapping */ +int +map_pages_to_xen( unsigned long v, unsigned long p, unsigned long s, -- 2.30.2